package crawl;

import com.csvreader.CsvReader;
import com.csvreader.CsvWriter;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;

/**
 * 〈合并爬取的数据并去重〉
 *
 * @author shiweixian
 * @date 2018/3/11
 * @since 1.0.0
 */
public class DataMerger {
    public static void main(String[] args) {
        try {
            CsvReader genReader = new CsvReader("d:\\genBank.csv", ',', Charset.forName("UTF-8"));
            CsvReader uniReader = new CsvReader("d:\\uniport.csv", ',', Charset.forName("UTF-8"));
            CsvWriter resultWriter = new CsvWriter("d:\\acidResult.csv", ',', Charset.forName("UTF-8"));
            // 跳过表头 如果需要表头的话，这句可以忽略
            genReader.readHeaders();
            // 保存uniport数据
            ArrayList<String> uniList = new ArrayList<String>();
            while (uniReader.readRecord()) {
                resultWriter.writeRecord(uniReader.getValues());
                uniList.add(uniReader.getValues()[1]);
            }
            while (genReader.readRecord()) {
                String[] values = genReader.getValues();
                if (!uniList.contains(values[1])) {
                    resultWriter.writeRecord(values);
                }
            }
            uniReader.close();
            genReader.close();
            resultWriter.flush();
            resultWriter.close();
            System.out.println("合并完成");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}